Preamble

Load Data

Subject / Visit Data

Microbiome Data

_sample = pd.read_table('meta/sample.tsv', index_col='sample_id') sample.join(_sample[['plate_well', 'source_sample_id']])[sample.has_mgen][['source_sample_id', 'sample_notes', 'plate_well']].to_csv('all_sequenced_samples.csv')

Taxonomy

Slice/Merge Taxa

sample_x_sotu.loc[sample.sample_type.isin(['donor_mean', 'baseline'])].groupby(sotu_to_taxonomy.p__, axis='columns').sum().mean().sort_values()

Sample Metadata

Results

Subsection: Study design and subject demographics

Subsection: FMT efficacy and impacts of treatment

Remission and responder status

print(recipient[[ 'treatment_abx_pre', 'maintenance_', 'donor_subject_id', 'remission', 'responder', 'withdrawal_due_to_failure', ]].to_markdown())

Antibiotics

Enema vs. Capsules

Donor

Mayo Score Improvement

fig, ax = plt.subplots(figsize=(6, 10)) d0 = ( recipient .assign(withdrawal_due_to_failure=lambda x: x.withdrawal_due_to_failure.astype(float)) .assign(jitter=lambda x: np.linspace(-0.1, 0.1, num=len(x))) ) #d0 = recipient.dropna(subset=['mayo_total_start', 'mayo_total_end']) #x_shift_range = np.linspace(-0.001, 0.001, num=d0.shape[0]) #d0 = d0.sort_values(['mayo_total_start', 'mayo_total_end'], ascending=False).assign(offset=x_shift_range) plot_kwsA = dict(alpha=0.75) for subject_id, d1 in d0.iterrows(): if np.isnan(d1.mayo_total_start): continue elif d1.withdrawal_due_to_failure == 1.0: plot_kwsB = dict(linestyle='--', lw=2) d1['mayo_total_end'] = d1['mayo_total_start'] elif np.isnan(d1.mayo_total_end): continue else: plot_kwsB = dict(linestyle='-', lw=3) ax.plot( [0 + d1['jitter'], 1 + d1['jitter']], d1[['mayo_total_start', 'mayo_total_end']], # marker=lib.project_style.DEFAULT_MARKER_PALETTE[d1['donor_subject_id']], # marker='o', color=lib.project_style.DEFAULT_COLOR_PALETTE[d1['arm']], label='__nolegend__', **plot_kwsA, **plot_kwsB, ) for arm in lib.project_style.ARM_ORDER: ax.plot([], [], color=lib.project_style.DEFAULT_COLOR_PALETTE[arm], label=arm, lw=2, **plot_kwsA) # for donor_subject_id in ['D0044', 'D0097', 'D0485', 'D0065']: # ax.plot([], [], linestyle='none', marker=lib.project_style.DEFAULT_MARKER_PALETTE[donor_subject_id], color='grey', label=donor_subject_id) ax.legend(bbox_to_anchor=(1, 1)) ax.axhline(2.1, lw=2, linestyle=':', color='grey', zorder=0,) ax.set_ylabel('Mayo Score') ax.set_xticks([0, 1]) ax.set_xticklabels(['Baseline', '2-Week Follow-up']) # ax.set_xlabel('Colonoscopy')

GEE with Partial Mayo Scores

Null Model

Subsection: Multi-modal longitudinal characterization of the microbiome

Data sizes

Subsection: Subject and donor clustering within datasets

sample_x_rotu.join(m, how='left').groupby(['subject_id', 'sample_type']).apply(len).unstack()
def _p_to_s(p): if p < 0.05: return 40 elif p < 0.10: return 15 else: return 0 sample_type_order_idx = pd.Series({v: i for i, v in enumerate(lib.project_data.SAMPLE_TYPE_ORDER_SIMPLE)}) sample_type_tick_labels = [lib.project_style.SAMPLE_TYPE_LABELS[sample_type] for sample_type in lib.project_data.SAMPLE_TYPE_ORDER_SIMPLE] assert len(sample_type_tick_labels) == len(sample_type_order_idx) datatypes = ['sotu', 'motu', 'rotu', 'ko', 'chem_ba'] zorder_palette = {'sotu': 0, 'motu': 1, 'rotu': 2, 'ko': 3, 'chem_ba': 4} fig, ax = plt.subplots(figsize=(4.5, 3)) for col in datatypes: d0 = pd.DataFrame(dict(r=donor_anosim_r[col], p=donor_anosim_p[col], s=donor_anosim_p[col].map(_p_to_s), x=sample_type_order_idx)).sort_values('x') ax.plot('x', 'r', data=d0, label='__nolegend__', c=lib.project_style.DEFAULT_COLOR_PALETTE[col], alpha=1.0, zorder=zorder_palette[col]) ax.plot([], [], marker='o', linestyle='-', label=lib.project_style.OMICS_NAME[col], c=lib.project_style.DEFAULT_COLOR_PALETTE[col], alpha=1.0) for _, d1 in d0.iterrows(): annotation = lib.project_style.pvalue_to_annotation(d1.p) if annotation == '∙': yoffset = 0.02 elif annotation == '**': yoffset = 0.01 elif annotation == '*': yoffset = 0.01 elif annotation == '': yoffset = 0 ax.annotate(annotation, xy=(d1.x, d1.r + yoffset), color=lib.project_style.DEFAULT_COLOR_PALETTE[col], ha='center', va='center', zorder=zorder_palette[col]) # plt.scatter('x', 'r', data=d, s='s', label='__nolegend__', vmin=0, vmax=1, marker='o') # plt.xticks(ticks=d.x, labels=idxwhere(sample_type_order_idx.isin(d.x)), rotation=45, ha='right') # # Dummy points for legend: # plt.plot([], [], markersize=_p_to_s(0.04)/5, color='grey', linestyle='-', marker='o', label='p < 0.05') # plt.plot([], [], markersize=_p_to_s(0.09)/5, color='grey', linestyle='-', marker='o', label='p < 0.10') # # plt.plot([], [], markersize=_p_to_s(0.051)/6, color='grey', linestyle='-', marker='o', label='p>=0.05') ax.set_ylim(-0.15, 1.15) # ax.set_yticklabels([]) # ax.legend(bbox_to_anchor=(0.4, 0.465)) ax.set_ylabel('ANOSIM R (donor)') ax.set_xlabel('Time-point') plt.xticks(range(len(sample_type_tick_labels)), sample_type_tick_labels) # ax.set_title('By Donor') ax.text(-0.0, 1.05, 'E', fontsize=12, fontweight='heavy', transform=plt.gca().transAxes) fig.savefig('fig/multiomics_anosim_by_donor.pdf', bbox_inches='tight') # Legend fig, ax = plt.subplots(figsize=(1, 1)) for col in datatypes: ax.plot([], [], marker='o', linestyle='-', label=lib.project_style.OMICS_NAME[col], c=lib.project_style.DEFAULT_COLOR_PALETTE[col], alpha=0.8) ax.legend() ax.legend(ncol=1) for spines in ['top', 'right', 'bottom', 'left']: ax.spines[spines].set_visible(False) ax.set_xticks([]) ax.set_yticks([]) donor_anosim_p fig.savefig('fig/multiomics_line_point_legend.pdf', bbox_inches='tight')
fig, ax = plt.subplots(figsize=(1, 3)) datatypes = ['sotu', 'motu', 'rotu', 'ko', 'chem_ba'] d0 = subject_anosim_results.set_index('dataset') for col in datatypes: d1 = d0.loc[col] ax.scatter([0], d1.r, marker='o', s=20, label=lib.project_style.OMICS_NAME[col], c=lib.project_style.DEFAULT_COLOR_PALETTE[col], alpha=1.) annotation = lib.project_style.pvalue_to_annotation(d1.p) if annotation == '∙': yoffset = 0.03 elif annotation == '**': yoffset = 0.02 elif annotation == '*': yoffset = 0.02 elif annotation == '': yoffset = 0 ax.annotate(annotation, xy=(0, d1.r + yoffset), color=lib.project_style.DEFAULT_COLOR_PALETTE[col], ha='center', va='center', alpha=1.) # ax.set_yticks([]) ax.set_ylim(-0.15, 1.15) ax.set_xticks([0]) ax.set_xticklabels(['D1-F2 (pooled)']) plt.ylabel('ANOSIM R (subject)') # plt.title('By Subject') plt.text(-0.0, 1.05, 'F', fontsize=12, fontweight='heavy', transform=plt.gca().transAxes) fig.savefig('fig/multiomics_anosim_by_subject.pdf', bbox_inches='tight')

Subsection: Transfer and engraftment of donor taxa in patients

ASV-level analysis

Species-level analysis

Strain-level analysis

Negative control to assess false positive rate of assigning strain "transfer" events

Strain transfer differs over phyla

Subsection: Antibiotics and donors may modulate effects of FMT on the microbiome

GEE on donor/subject similarity profiles

Load/Shape Data

Null Model

Antibiotics

Maintenance Delivery

Donor Subject ID

Antibiotics compared to alternative null

Maintenance method compared to alternative null

Responder Status

Fraction of donor strains transferred as a function of treatment

Subsection: Broad correlations between datasets